import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import os
file_path = r"/content/spotify.csv"
if os.path.exists(file_path):
print("File found")
else:
print("File NOT found.")
File found!
file_path = "/content/Wiki-Vote.txt" # change this to your actual file path
edges = [] # will store tuples of the form (fromNode, toNode)
node = []
with open(file_path, "r", encoding="utf-8") as f:
for line in f:
# Remove leading/trailing whitespace
line = line.strip()
# Skip empty lines or lines starting with '#'
if not line or line.startswith('#'):
continue
# Split into parts by whitespace
parts = line.split()
# If this line is supposed to hold two values, parse them
if len(parts) == 2:
from_node, to_node = map(int, parts)
edges.append((from_node, to_node))
print("Number of edges:", len(edges))
print("First 5 edges:", edges[:5])
Number of edges: 103689 First 5 edges: [(30, 1412), (30, 3352), (30, 5254), (30, 5543), (30, 7478)]
wiki_vote_path = "/content/Wiki-Vote.txt"
G_wiki = nx.read_edgelist(wiki_vote_path, create_using=nx.DiGraph(), comments="#", nodetype=int)
# Calculate number of nodes and edges
num_nodes = G_wiki.number_of_nodes()
num_edges = G_wiki.number_of_edges()
print(f"Number of nodes {num_nodes},")
print(f"Number of edges {num_edges},")
Number of nodes 7115, Number of edges 103689,
# Create the directed graph for Wiki-Vote data
G = G_wiki
pos = nx.spring_layout(G, seed=42)
# Set figure size and title
size = 20
title = "Wikipedia Votes Network (Adminship Voting)"
fig, ax = plt.subplots(figsize=(size, size - 4))
# Draw nodes
options = {"edgecolors": "tab:gray", "node_size": 25, "alpha": 0.75}
nx.draw_networkx_nodes(G, pos, node_color=["#76c8c8"], **options)
# Draw edges (directed)
nx.draw_networkx_edges(G, pos, edge_color="gray", width=0.5, alpha=0.3, arrows=True, arrowsize=6)
# Plot settings
plt.tight_layout()
plt.title(title, fontsize=20)
plt.axis("off")
plt.show()
Network Analysis Degree Distribution Analysis
def degree_dist(G,title):
# Degree Distribution:
degrees = [deg for _, deg in G.degree()]
plt.hist(degrees, bins=15, edgecolor='black')
plt.xlabel("Degree")
plt.ylabel("Frequency")
plt.title(title)
plt.show()
degree_dist(G,"Degree Distribution of Real Graph")
def concomp(G):
num_components = nx.number_weakly_connected_components(G)
largest_cc = max(nx.weakly_connected_components(G), key=len)
largest_subgraph = G.subgraph(largest_cc).copy()
print(f"Number of weakly connected components: {num_components}")
print(f"Largest component size: {len(largest_cc)}")
return largest_subgraph
largest_subgraph = concomp(G)
Number of weakly connected components: 24 Largest component size: 7066
UG = largest_subgraph.to_undirected()
if nx.is_connected(UG):
avg_path_len = nx.average_shortest_path_length(UG)
diameter = nx.diameter(UG)
print(f"Average shortest path length: {avg_path_len}")
print(f"Diameter: {diameter}")
else:
print("Subgraph is not connected. Cannot compute path-based metrics.")
Average shortest path length: 3.247509990226615 Diameter: 7
average_clustering = nx.average_clustering(G)
density = nx.density(G)
print(f"Average clustering coefficient : {average_clustering}")
print(f"Network Density: {density}")
Average clustering coefficient : 0.5085087124404498 Network Density: 0.001981599433828733
Centrality analysis
Degree Centrality
def degree_centrality(G):
degree_centrality=nx.degree_centrality(G)
most_active =sorted(degree_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 most active nodes by degree centrality :")
for node, centrality in most_active:
print(f"node {node}, Centrality: {centrality:.10f}")
degree_centrality(G)
Top 10 most active nodes by degree centrality : node 2565, Centrality: 0.1507430998 node 766, Centrality: 0.1094125973 node 11, Centrality: 0.1051663128 node 1549, Centrality: 0.1047416844 node 457, Centrality: 0.1036093418 node 1166, Centrality: 0.0973814579 node 2688, Centrality: 0.0874734607 node 1374, Centrality: 0.0754423213 node 1151, Centrality: 0.0731776362 node 5524, Centrality: 0.0700636943
Betweenness Centrality
def between_centrality(G):
betweenness_centrality=nx.betweenness_centrality(G)
top_betweenness =sorted(betweenness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 most active nodes by degree centrality :")
for node, centrality in top_betweenness:
print(f"node {node}, Centrality: {centrality:.10f}")
between_centrality(G)
Top 10 most active nodes by degree centrality : node 2565, Centrality: 0.0621102429 node 11, Centrality: 0.0361871579 node 457, Centrality: 0.0359790207 node 4037, Centrality: 0.0289607164 node 1549, Centrality: 0.0264972305 node 766, Centrality: 0.0257051716 node 1166, Centrality: 0.0248066639 node 15, Centrality: 0.0203233473 node 1374, Centrality: 0.0193799230 node 2237, Centrality: 0.0152684815
Closeness
def closeness_centrality(G):
closeness_centrality=nx.closeness_centrality(G)
top_closeness =sorted(closeness_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 most active nodes by degree centrality :")
for node, centrality in top_closeness:
print(f"node {node}, Centrality: {centrality:.10f}")
closeness_centrality(G)
Top 10 most active nodes by degree centrality : node 2565, Centrality: 0.4907954151 node 766, Centrality: 0.4701537233 node 457, Centrality: 0.4698410587 node 1549, Centrality: 0.4690923577 node 1166, Centrality: 0.4689055552 node 1374, Centrality: 0.4532623340 node 11, Centrality: 0.4519286126 node 1151, Centrality: 0.4471235998 node 2688, Centrality: 0.4448992443 node 2485, Centrality: 0.4375154818
Eigenvector centrality
def eigen_centrality(G):
eigenvector_centrality=nx.eigenvector_centrality(G)
top_eigenvector =sorted(eigenvector_centrality.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 most active nodes by degree centrality :")
for node, centrality in top_eigenvector:
print(f"node {node}, Centrality: {centrality:.10f}")
eigen_centrality(G)
Top 10 most active nodes by degree centrality : node 2565, Centrality: 0.1576876546 node 766, Centrality: 0.1301506260 node 1549, Centrality: 0.1293987753 node 1166, Centrality: 0.1195107407 node 2688, Centrality: 0.1100709104 node 457, Centrality: 0.1100008669 node 3352, Centrality: 0.0917856212 node 11, Centrality: 0.0895923462 node 1151, Centrality: 0.0871942492 node 1374, Centrality: 0.0869397259
Page rank
def pagerank_centrality(G):
pagerank=nx.pagerank(G)
top_pagerank =sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:10]
print("Top 10 most active nodes by degree centrality :")
for node, centrality in top_pagerank:
print(f"node {node}, Centrality: {centrality:.10f}")
pagerank_centrality(G)
Top 10 most active nodes by degree centrality : node 2565, Centrality: 0.0043588671 node 11, Centrality: 0.0030333278 node 766, Centrality: 0.0029809543 node 457, Centrality: 0.0029772160 node 4037, Centrality: 0.0029051737 node 1549, Centrality: 0.0028716081 node 1166, Centrality: 0.0026808432 node 2688, Centrality: 0.0023925463 node 15, Centrality: 0.0021808684 node 1374, Centrality: 0.0021427121
Removing random node
def rem_nodes(G, num_remove, title):
G_copy = G.copy()
G_copy.remove_nodes_from(num_remove)
# Get components based on graph type
if G.is_directed():
components = sorted(nx.weakly_connected_components(G_copy), key=len, reverse=True)
else:
components = sorted(nx.connected_components(G_copy), key=len, reverse=True)
color_palette = plt.get_cmap("tab20").colors
fig, ax = plt.subplots(figsize=(18, 14))
for i, component in enumerate(components[:10]): # Top 10 largest components
subgraph = G_copy.subgraph(component)
pos = nx.spring_layout(subgraph, k=0.1, seed=42)
node_color = color_palette[i % len(color_palette)]
# Draw nodes and edges
nx.draw_networkx_nodes(subgraph, pos, node_color=node_color,
edgecolors="tab:gray", node_size=25, alpha=0.75, ax=ax)
nx.draw_networkx_edges(subgraph, pos, width=1.0, alpha=0.5, ax=ax)
ax.set_title(title, fontsize=16)
ax.axis("off")
plt.show()
# Example usage
remove = np.random.choice(list(G.nodes()), size=100, replace=False).tolist()
rem_nodes(G, remove, "Effect of Removing Random Nodes")
/usr/local/lib/python3.11/dist-packages/networkx/drawing/nx_pylab.py:457: UserWarning: *c* argument looks like a single numeric RGB or RGBA sequence, which should be avoided as value-mapping will have precedence in case its length matches with *x* & *y*. Please use the *color* keyword-argument or provide a 2D array with a single row if you intend to specify the same RGB or RGBA value for all points. node_collection = ax.scatter(
removing high centrality node
def rem_nodes(G):
pagerank = nx.pagerank(G)
top = sorted(pagerank.items(), key=lambda x: x[1], reverse=True)[:100]
G_copy = G.copy()
nodes_remove = []
for node, x in top:
nodes_remove.append(node)
G_copy.remove_nodes_from(nodes_remove)
components = list(nx.connected_components(G_copy))
largest_component = None
if components:
largest_component = max(components, key=len)
component_colors = {}
for i, component in enumerate(components):
component_colors[tuple(component)] = f"C{i}"
size = 20
fig, ax = plt.subplots(figsize=(size, size-4))
title = "Effect of removing nodes with highest page rank centrality"
if not components:
return "No connected components"
for i, component in enumerate(components):
subgraph = G_copy.subgraph(component)
pos = nx.spring_layout(subgraph, k=0.1, seed=42)
options = {"edgecolors": "tab:gray", "node_size": 25, "alpha": 0.75}
nx.draw_networkx_nodes(subgraph, pos, node_color=component_colors[tuple(component)], **options)
nx.draw_networkx_edges(subgraph, pos, width=1.0, alpha=0.5)
ax.set_title(title)
plt.show()
rem_nodes(G)
Comparison with Random Graphs
Erdos Renyi
n = G.number_of_nodes()
m = G.number_of_edges()
p = m / (n * (n - 1))
k = max(1, int(m / n))
ER = nx.gnp_random_graph(n, p, directed=True)
BA = nx.barabasi_albert_graph(n, k)
WS = nx.watts_strogatz_graph(n, k, 0.1)
def get_largest_component_subgraph(G):
if G.is_directed():
component = max(nx.weakly_connected_components(G), key=len)
else:
component = max(nx.connected_components(G), key=len)
return G.subgraph(component).copy()
# Extract largest components
wiki_sub = get_largest_component_subgraph(G)
ER_sub = get_largest_component_subgraph(ER)
BA_sub = get_largest_component_subgraph(BA)
WS_sub = get_largest_component_subgraph(WS)
# Plotting
fig, axs = plt.subplots(2, 2, figsize=(14, 12))
axs[0, 0].set_title("Wiki-Vote")
nx.draw(wiki_sub, ax=axs[0, 0], node_size=20, edge_color='gray', with_labels=False)
axs[0, 1].set_title("Erdős–Rényi")
nx.draw(ER_sub, ax=axs[0, 1], node_size=20, edge_color='gray', with_labels=False)
axs[1, 0].set_title("Barabási–Albert")
nx.draw(BA_sub, ax=axs[1, 0], node_size=20, edge_color='gray', with_labels=False)
axs[1, 1].set_title("Watts–Strogatz")
nx.draw(WS_sub, ax=axs[1, 1], node_size=20, edge_color='gray', with_labels=False)
plt.suptitle("Comparison of Real and Synthetic Network Models", fontsize=16)
plt.tight_layout(rect=[0, 0, 1, 0.96])
plt.show()
average degree for all grapg
def compute_average_degree(graph):
degrees = dict(graph.degree())
average = sum(degrees.values()) / len(degrees)
top_node = max(degrees, key=degrees.get)
top_value = degrees[top_node]
return round(average, 2), top_node, highest_value
def average_degree_report():
graph_dict = {
"Erdős–Rényi": ER,
"Barabási–Albert": BA,
"Watts–Strogatz": WS
}
results = []
for name, graph in graph_dict.items():
avg_degree, top_node, Highest_value = compute_average_degree(graph)
results.append({
"Graph": name,
"Average Degree": avg_degree,
"Top Degree Node": f"{top_node} ({highest_value})"
})
df = pd.DataFrame(results).set_index("Graph")
print(df)
# Run the report
average_degree_report()
Average Degree Top Degree Node Graph Erdős–Rényi 28.49 493 (0.7575757575757576) Barabási–Albert 27.94 19 (0.7575757575757576) Watts–Strogatz 14.00 371 (0.7575757575757576)
def longest_shortest_path(G):
if G.is_directed():
G = G.to_undirected()
if not nx.is_connected(G):
largest_cc = max(nx.connected_components(G), key=len)
G = G.subgraph(largest_cc).copy()
diameter = nx.diameter(G)
return diameter
W= []
for name, G in graphs.items():
try:
d = longest_shortest_path(G)
W.append({
"Graph": name,
"Diameter (Longest Shortest Path)": d
})
except Exception as e:
W.append({
"Graph": name,
"Diameter (Longest Shortest Path)": f"Error: {str(e)}"
})
# Display as DataFrame
df_diameter = pd.DataFrame(W).set_index("Graph")
print(df_diameter)
Diameter (Longest Shortest Path) Graph Erdős–Rényi 4 Barabási–Albert 4 Watts–Strogatz 7
Longest shortes path with diameter
import pandas as pd
import networkx as nx
def longest_shortest_path(G):
# Convert directed to undirected for proper path calculations
if G.is_directed():
G = G.to_undirected()
# If the graph is connected
if nx.is_connected(G):
average_path = nx.average_shortest_path_length(G)
diameter = nx.diameter(G)
else:
largest_cc = max(nx.connected_components(G), key=len)
G = G.subgraph(largest_cc).copy()
average_path = nx.average_shortest_path_length(G)
diameter = nx.diameter(G)
return average_path, diameter
W = []
for name, G in graphs.items():
avg_path, diam = longest_shortest_path(G)
W.append({
"Graph": name,
"Average Shortest Path": round(avg_path, 2),
"Diameter": diam
})
df_longest_shortest_path = pd.DataFrame(W).set_index("Graph")
print(df_longest_shortest_path)
Average Shortest Path Diameter Graph Erdős–Rényi 2.93 4 Barabási–Albert 2.82 4 Watts–Strogatz 4.92 7
Clustering For all Graphs
for name, G in graphs.items():
print(name, nx.average_clustering(G))
Erdős–Rényi 0.001944590224399168 Barabási–Albert 0.018214373216336132 Watts–Strogatz 0.5085087124404498
Degree Centrality
W={}
for name, G in graphs.items():
top10 = sorted(nx.degree_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
W[name] = [f"{score:.4f} ({node})" for node, score in top10]
df = pd.DataFrame(W, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
Top 1 Top 2 Top 3 Top 4 \
Erdős–Rényi 0.0072 (493) 0.0071 (1404) 0.0071 (1564) 0.0069 (5123)
Barabási–Albert 0.0786 (19) 0.0713 (16) 0.0703 (15) 0.0701 (18)
Watts–Strogatz 0.0027 (371) 0.0027 (1316) 0.0025 (190) 0.0025 (750)
Top 5 Top6 Top7 Top8 \
Erdős–Rényi 0.0068 (6364) 0.0067 (3709) 0.0067 (6032) 0.0065 (344)
Barabási–Albert 0.0644 (17) 0.0599 (21) 0.0559 (20) 0.0545 (35)
Watts–Strogatz 0.0025 (1629) 0.0025 (2519) 0.0025 (3083) 0.0025 (3246)
Top9 Top10
Erdős–Rényi 0.0065 (419) 0.0065 (5646)
Barabási–Albert 0.0527 (24) 0.0485 (27)
Watts–Strogatz 0.0025 (3570) 0.0025 (3714)
Closeness Centrality
def top_closeness_nodes(graph, top_n=10):
closeness = nx.closeness_centrality(graph)
top_nodes = sorted(closeness.items(), key=lambda x: x[1], reverse=True)[:top_n]
return [f"{score:.4f} ({node})" for node, score in top_nodes]
def closeness_centrality_report(graphs_dict):
top_n = 10
closeness_data = {}
for name, graph in graphs_dict.items():
closeness_data[name] = top_closeness_nodes(graph, top_n)
lable = [f"Top {i+1}" for i in range(top_n)]
df = pd.DataFrame(closeness_data, index=lable).T
print(df)
# Run the report
closeness_centrality_report(graphs)
Top 1 Top 2 Top 3 Top 4 \
Erdős–Rényi 0.3013 (4845) 0.3005 (319) 0.2989 (5863) 0.2988 (493)
Barabási–Albert 0.5120 (19) 0.5090 (15) 0.5089 (16) 0.5063 (18)
Watts–Strogatz 0.2258 (6568) 0.2256 (1405) 0.2251 (3659) 0.2238 (1629)
Top 5 Top 6 Top 7 Top 8 \
Erdős–Rényi 0.2985 (1404) 0.2976 (959) 0.2972 (1564) 0.2970 (5511)
Barabási–Albert 0.5044 (17) 0.4975 (20) 0.4973 (21) 0.4904 (0)
Watts–Strogatz 0.2230 (829) 0.2228 (3714) 0.2221 (3710) 0.2221 (6534)
Top 9 Top 10
Erdős–Rényi 0.2968 (5046) 0.2968 (2864)
Barabási–Albert 0.4902 (27) 0.4891 (35)
Watts–Strogatz 0.2213 (3127) 0.2210 (6905)
W={}
for name, G in graphs.items():
top10 = sorted(nx.closeness_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
W[name] = [f"{score:.4f} ({node})" for node, score in top10]
df = pd.DataFrame(W, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
Top 1 Top 2 Top 3 Top 4 \
Erdős–Rényi 0.3013 (4845) 0.3005 (319) 0.2989 (5863) 0.2988 (493)
Barabási–Albert 0.5120 (19) 0.5090 (15) 0.5089 (16) 0.5063 (18)
Watts–Strogatz 0.2258 (6568) 0.2256 (1405) 0.2251 (3659) 0.2238 (1629)
Top 5 Top6 Top7 Top8 \
Erdős–Rényi 0.2985 (1404) 0.2976 (959) 0.2972 (1564) 0.2970 (5511)
Barabási–Albert 0.5044 (17) 0.4975 (20) 0.4973 (21) 0.4904 (0)
Watts–Strogatz 0.2230 (829) 0.2228 (3714) 0.2221 (3710) 0.2221 (6534)
Top9 Top10
Erdős–Rényi 0.2968 (5046) 0.2968 (2864)
Barabási–Albert 0.4902 (27) 0.4891 (35)
Watts–Strogatz 0.2213 (3127) 0.2210 (6905)
Betweenness Centrality
r={}
for name, G in graphs.items():
top10 = sorted(nx.betweenness_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
r[name] = [f"{score:.4f} ({node})" for node, score in top10]
df = pd.DataFrame(r, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
Top 1 Top 2 Top 3 Top 4 \
Erdős–Rényi 0.0011 (493) 0.0011 (1564) 0.0010 (5123) 0.0010 (6364)
Barabási–Albert 0.0358 (19) 0.0302 (16) 0.0302 (18) 0.0300 (15)
Watts–Strogatz 0.0031 (1629) 0.0031 (3714) 0.0029 (3710) 0.0028 (1405)
Top 5 Top6 Top7 Top8 \
Erdős–Rényi 0.0010 (6581) 0.0009 (419) 0.0009 (1404) 0.0009 (71)
Barabási–Albert 0.0259 (17) 0.0227 (21) 0.0213 (20) 0.0185 (35)
Watts–Strogatz 0.0026 (3246) 0.0026 (3243) 0.0026 (3659) 0.0025 (3908)
Top9 Top10
Erdős–Rényi 0.0009 (1134) 0.0009 (6500)
Barabási–Albert 0.0178 (24) 0.0160 (0)
Watts–Strogatz 0.0025 (190) 0.0025 (1956)
Eigenvector Centrality
r={}
for name, G in graphs.items():
top10 = sorted(nx.eigenvector_centrality(G).items(), key = lambda x: x[1], reverse=True)[:10]
r[name] = [f"{score:.4f} ({node})" for node, score in top10]
df = pd.DataFrame(r, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
Top 1 Top 2 Top 3 Top 4 \
Erdős–Rényi 0.0250 (4845) 0.0249 (319) 0.0247 (493) 0.0245 (5863)
Barabási–Albert 0.1661 (19) 0.1590 (15) 0.1552 (16) 0.1477 (18)
Watts–Strogatz 0.0207 (2712) 0.0203 (1293) 0.0198 (3083) 0.0198 (2103)
Top 5 Top6 Top7 Top8 \
Erdős–Rényi 0.0236 (1404) 0.0231 (959) 0.0229 (1564) 0.0228 (5046)
Barabási–Albert 0.1403 (17) 0.1235 (20) 0.1234 (21) 0.1117 (0)
Watts–Strogatz 0.0198 (750) 0.0198 (2714) 0.0196 (1290) 0.0194 (2710)
Top9 Top10
Erdős–Rényi 0.0226 (2864) 0.0226 (5511)
Barabási–Albert 0.1099 (27) 0.1048 (25)
Watts–Strogatz 0.0194 (2715) 0.0193 (1297)
Page Rank
r={}
for name, G in graphs.items():
top10 = sorted(nx.pagerank(G).items(), key = lambda x: x[1], reverse=True)[:10]
r[name] = [f"{score:.4f} ({node})" for node, score in top10]
df = pd.DataFrame(r, index=["Top 1", "Top 2", "Top 3", "Top 4", "Top 5","Top6","Top7","Top8","Top9","Top10"]).T
print(df)
Top 1 Top 2 Top 3 Top 4 \
Erdős–Rényi 0.0003 (4845) 0.0003 (319) 0.0003 (493) 0.0003 (5046)
Barabási–Albert 0.0024 (19) 0.0022 (16) 0.0022 (18) 0.0021 (15)
Watts–Strogatz 0.0002 (1316) 0.0002 (371) 0.0002 (3246) 0.0002 (190)
Top 5 Top6 Top7 Top8 \
Erdős–Rényi 0.0003 (5863) 0.0003 (6500) 0.0003 (424) 0.0003 (2864)
Barabási–Albert 0.0020 (17) 0.0018 (21) 0.0017 (20) 0.0017 (35)
Watts–Strogatz 0.0002 (3908) 0.0002 (2519) 0.0002 (6339) 0.0002 (3714)
Top9 Top10
Erdős–Rényi 0.0003 (5190) 0.0003 (5870)
Barabási–Albert 0.0016 (24) 0.0015 (27)
Watts–Strogatz 0.0002 (3570) 0.0002 (5551)
Comparing with Real Network - The Watts - strogatz showes highest clustering coefficient, but other graph like Erdos and Barabasi graph shows low clusterin. when we talk about degree distribution the Wiki Vote is highly skewed, while WS degree distrivution is narrow and regular `but good for real world distribution.
Removing hight centrality node In WS
import networkx as nx
import matplotlib.pyplot as plt
# Step 1: Get clustering coefficients
clustering = nx.clustering(WS)
highest_node = max(clustering, key=clustering.get)
highest_value = clustering[highest_node]
print(f"Removing node {highest_node} with clustering coefficient: {highest_value:.4f}")
# Step 2: Draw original graph with highest clustering node highlighted
pos = nx.spring_layout(WS, seed=42)
plt.figure(figsize=(12, 5))
# Original graph
plt.subplot(1, 2, 1)
plt.title("Original WS Graph (Highest Cluster Node Highlighted)")
node_colors = ['red' if n == highest_node else 'lightgray' for n in WS.nodes()]
node_sizes = [200 if n == highest_node else 50 for n in WS.nodes()]
nx.draw(WS, pos, node_color=node_colors, node_size=node_sizes, with_labels=False, edge_color='gray')
# Step 3: Remove node and draw updated graph
WS_modified = WS.copy()
WS_modified.remove_node(highest_node)
plt.subplot(1, 2, 2)
plt.title("WS Graph After Removing Highest Clustering Node")
nx.draw(WS_modified, pos, node_color='skyblue', node_size=50, with_labels=False, edge_color='gray')
plt.tight_layout()
plt.show()
Removing node 1248 with clustering coefficient: 0.7576
Community Experiment
partition =nx.community.louvain_communities(G, resolution=1, threshold=1e-07, max_level=None, seed=None)
num_communities = len(partition)
print(f"Number of communities detected (Louvain): {num_communities}")
colors = []
for node in G.nodes():
colors.append(node)
fig, ax = plt.subplots(figsize=(size, size-4))
#for node
options = {"edgecolors": "tab:gray", "node_size": 15, "alpha": 0.75}
nx.draw_networkx_nodes(G, pos, node_color=colors,cmap=plt.cm.viridis, **options)
# for degree
nx.draw_networkx_edges(G, pos, width=0.6, alpha=0.3)
plt.title("Louvain Community Detection")
plt.show()
Number of communities detected (Louvain): 47
Linear Threshold Model
1st finding the most influential Nodes
def linear_threshold(G, nodes):
# Assign random thresholds
thresholds = {}
for node in G.nodes:
thresholds[node] = np.random.uniform(0, 1)
# Assign random edge weights
weights = {}
for edge in G.edges:
weights[edge] = np.random.uniform(0, 1)
# Initialize active nodes
active_nodes = set(nodes)
# Iterative activation process
newly_activated = True
while newly_activated:
newly_activated = False
# Nodes that are not yet active
inactive_nodes = set(G.nodes) - active_nodes
for node in inactive_nodes:
total_weight = 0
for neighbor in G.neighbors(node):
if neighbor in active_nodes:
total_weight += weights.get((neighbor, node), 0)
total_weight += weights.get((node, neighbor), 0)
if total_weight >= thresholds[node]:
active_nodes.add(node)
newly_activated = True
return active_nodes
# Select initial seed nodes 1 randomly
seed_nodes = np.random.choice(G.nodes, size=100, replace=False)
# Run the model
final_active_nodes = linear_threshold_model(G, seed_nodes)
def show_threshold(final_active_nodes):
# Assign colors based on activation
node_colors = []
for node in G.nodes:
if node in final_active_nodes:
node_colors.append("red")
else:
node_colors.append("blue")
# Draw the graph
fig, ax = plt.subplots(figsize=(size, size-4))
pos = nx.spring_layout(G, seed=42)
# nodes
options = {"edgecolors": "tab:orange", "node_size": 15}
nx.draw_networkx_nodes(G, pos, node_color=node_colors,cmap=plt.cm.viridis, **options)
# edges
nx.draw_networkx_edges(G, pos, width=0.6, alpha=0.3)
plt.title("Linear Threshold Model")
plt.show()
show_threshold(final_active_nodes)
def initialize_thresholds_and_weights(graph, threshold_range=(0, 1), weight_range=(0, 1)):
"""Assign random thresholds and edge weights for each node."""
thresholds = {}
weights = {}
for node in graph.nodes():
thresholds[node] = np.random.uniform(*threshold_range)
weights[node] = {
neighbor: np.random.uniform(*weight_range)
for neighbor in graph.neighbors(node)
}
return thresholds, weights
def linear_threshold_diffusion(graph, seeds, thresholds, weights):
"""Spread influence using Linear Threshold Model."""
active_nodes = set(seeds)
newly_active = set(seeds)
while newly_active:
next_active = set()
for node in graph.nodes():
if node not in active_nodes:
influence = sum(
weights[node].get(neighbor, 0)
for neighbor in graph.neighbors(node)
if neighbor in active_nodes
)
if influence >= thresholds[node]:
next_active.add(node)
newly_active = next_active
active_nodes.update(newly_active)
return active_nodes
def plot_activation(graph, active_nodes, title="Linear Threshold Model"):
"""Visualize active and inactive nodes."""
pos = nx.spring_layout(graph, seed=42)
fig, ax = plt.subplots(figsize=(10, 6))
nx.draw_networkx_edges(graph, pos, width=0.6, alpha=0.3)
nx.draw_networkx_nodes(graph, pos,
nodelist=active_nodes,
node_color='red',
node_size=10,
label='Active')
nx.draw_networkx_nodes(graph, pos,
nodelist=set(graph.nodes()) - active_nodes,
node_color='blue',
node_size=10,
label='Inactive')
plt.title(title)
plt.legend()
plt.axis("off")
plt.show()
# === Main Flow ===
# Step 1: Assign thresholds and weights
thresholds, weights = initialize_thresholds_and_weights(G)
# Step 2: Choose top PageRank nodes as seeds
page_dict = nx.pagerank(G)
top_nodes = sorted(page_dict, key=page_dict.get, reverse=True)[:10]
# Step 3: Run the model
final_active_nodes = linear_threshold_diffusion(G, top_nodes, thresholds, weights)
# Step 4: Visualize result
plot_activation(G, final_active_nodes, title="LTM with Top PageRank Nodes")
The diameter based Linear threshold model effectly demonstrate a influence through network based to compared to random activation and high degree selection.By sending top PAGE RANK NODES IN the Wiki-votes and activates large portionof the network.
Reference
J. Leskovec, D. Huttenlocher, J. Kleinberg. Signed Networks in Social Media. CHI 2010. J. Leskovec, D. Huttenlocher, J. Kleinberg. Predicting Positive and Negative Links in Online Social Networks. WWW 2010. Anon, (n.d.).